from pyquery import PyQuery
from xpinyin import Pinyin
import requests
import pymysql
import re


'''
@Author:echohye
@Description:清洗日期
@Date:2022-03-03, 周四, 11:36
'''
def tran_data(ol: str):
    if '：' in ol:
        ol = ol.split('：')[1]
    ymr = "-".join(ol.split("年")).split("月")
    try:
        if ymr[1] == '':
            return "-22".join(ymr)
        return "-".join(ymr).split("日")[0]
    except IndexError:
        return "2001-3-3"


'''
@Author:echohye
@Description:爬取手机的参数信息
@Date:2022-03-02, 周三, 21:36
'''


def phone_param(phone_name, phone):
    _con = PyQuery(requests.get('https://detail.zol.com.cn/param_copy_{}_blue_1_0_720.html'.format(phone)).text)("#layoutTab>table")
    _index = True
    _cons = list(_con('tr').items())[1:]
    phone_info_dict = {}
    for _item in _cons:
        if _index:
            # print(_item('h3').html())
            phone_info_dict["手机名称"] = phone_name
            _index = False
            continue
        xx = list(_item.items('td'))
        if len(xx) == 2:
            if xx[0].text() in ['保修政策', '质保时间', '质保备注', '客服电话', '电话备注', '详细内容']:
                continue
            if xx[0].text() in ['发布会时间', '上市日期']:
                phone_info_dict[xx[0].text()] = tran_data(xx[1].text())
                continue
            phone_info_dict[xx[0].text()] = xx[1].text()
    # print(phone_info_dict)
    # print(': '.join([t.text() for t in _item('td').items()]))
    return phone_info_dict


'''
@Author:echohye
@Description:获取单个手机品牌的所有手机产品对应id
@Date:2022-03-02, 周三, 22:53
'''


def phone_id(brand_id: str):
    _con = PyQuery(requests.get('https://detail.zol.com.cn/cell_phone_index/subcate57_{}_list_1_0_9_2_0_1.html'.format(brand_id)).text)("#J_PicMode")
    _index = 1
    _phoneDict = {}
    while _con:
        for _item in _con('li').items():
            _phoneId = _item.attr('data-follow-id')
            if _phoneId:
                _phoneName = _item('img').attr('alt').split("（")[0].rstrip()
                if _phoneName not in _phoneDict:
                    _phoneDict[_phoneName] = _phoneId.split('p')[1]
        _index += 1
        _con = PyQuery(requests.get('https://detail.zol.com.cn/cell_phone_index/subcate57_{}_list_1_0_9_2_0_{}.html'.format(brand_id, _index)).text)(
            "#J_PicMode")
    keys = list(_phoneDict.keys())
    values = list(_phoneDict.values())
    keys.reverse()
    values.reverse()
    _phoneDict = dict(zip(keys, values))
    return _phoneDict


'''
@Author:echohye
@Description:获取各手机品牌的id
@Date:2022-03-02, 周三, 23:04
'''


def brands_id():
    _brands = PyQuery(requests.get('https://detail.zol.com.cn/cell_phone_index/subcate57_list_1.html').text)("#J_ParamBrand")
    _brandDict = {}
    _brandNeed = 17
    for _brand in _brands('a').items():
        # 暂时只要这13个品牌
        # 华为: 613  vivo: 1795  OPPO: 1673  苹果: 544  三星: 98  荣耀: 100840  iQOO: 510075
        # 小米: 34645  一加: 35579  魅族: 1434  realme: 55535  努比亚: 310005  红米: 55731
        if not _brandNeed:
            break
        _brandNeed -= 1
        _brandDict[_brand.text()] = re.findall('/cell_phone_index/subcate57_([0-9]+)_list_1.html', _brand.attr('href'))[0]
    return _brandDict


'''
@Author:echohye
@Description:创建表的预编译sql语句
@Date:2022-03-03, 周四, 15:27
'''


def create_table_sql(table_name):
    sql = """
    CREATE TABLE `{}`  (
      `id` bigint(30) NOT NULL AUTO_INCREMENT,
      `品牌` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
      `手机名称` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
      `发布会时间` date NULL DEFAULT NULL,
      `上市日期` date NULL DEFAULT NULL,
      `手机类型` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `机身材质` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `机身颜色` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `指纹识别` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `面部识别` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `长度` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `宽度` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `厚度` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `重量` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `其他外观参数` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `CPU型号` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `CPU频率` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `CPU核心数` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `GPU型号` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `RAM容量` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `RAM存储类型` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `ROM容量` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `ROM存储类型` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `存储卡` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `出厂系统内核` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `操作系统` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `散热` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `振动马达` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `扬声器` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `屏幕尺寸` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `屏幕材质` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `分辨率` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `屏幕比例` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `屏幕类型` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `屏幕刷新率` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `触控刷新率` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `HDR技术` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `对比度` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `屏幕色彩` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `屏幕技术` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `摄像头总数` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `后置摄像头` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `前置摄像头` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `传感器类型` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `传感器型号` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `闪光灯` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `广角` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `视频拍摄` varchar(1024) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `网络类型` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `网络频段` varchar(1024) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `SIM卡类型` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `WLAN功能` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `定位导航` varchar(512) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `蓝牙` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `NFC` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `红外功能` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `连接与共享` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `机身接口` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `其他网络参数` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `电池类型` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `电池容量` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `有线充电` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `感应器` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `游戏功能` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      `包装清单` varchar(512) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
      PRIMARY KEY (`id`) USING BTREE
    ) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
    """.format(table_name)
    return sql


'''
@Author:echohye
@Description:创建对应品牌的数据表并写入数据
@Date:2022-03-03, 周四, 00:12
'''


def save_data(brand_id_dict:dict):
    # 打开数据库连接
    db = pymysql.connect(
        user='root',  # 填写数据库用户
        password='zhy123',  # 填写数据库密码
        host='localhost',
        database='echohye_phone',  # 填写数据库名称，需要自己新建好
        port=3306,
    )

    table_name = 'all_phones'

    # 使用 cursor() 方法创建一个游标对象 cursor
    cursor = db.cursor()

    # 使用 execute() 方法执行 SQL，如果表存在则删除
    cursor.execute("DROP TABLE IF EXISTS {}".format(table_name))

    # 使用预处理语句创建表,使用 execute()  方法执行 SQL 查询
    cursor.execute(create_table_sql(table_name))
    for brand_name, brand_id in brand_id_dict.items():
        print("------>>> " + brand_name)
        phone_id_dict = phone_id(brand_id)

        for _phone_name, _phone_id in phone_id_dict.items():
            phone_info_dict = phone_param(_phone_name, _phone_id)
            print(_phone_name)
            # 执行插入数据
            data_sql = """
            INSERT INTO `echohye_phone`.`{}`(
            `品牌`, `手机名称`, `发布会时间`, `上市日期`, `手机类型`, `机身材质`, `机身颜色`, `指纹识别`, `面部识别`, `长度`, `宽度`, 
            `厚度`, `重量`, `其他外观参数`, `CPU型号`, `CPU频率`, `CPU核心数`, `GPU型号`, `RAM容量`, `RAM存储类型`, 
            `ROM容量`, `ROM存储类型`, `存储卡`, `出厂系统内核`, `操作系统`, `散热`, `振动马达`, `扬声器`, `屏幕尺寸`, 
            `屏幕材质`, `分辨率`, `屏幕比例`, `屏幕类型`, `屏幕刷新率`, `触控刷新率`, `HDR技术`, `对比度`, `屏幕色彩`, 
            `屏幕技术`, `摄像头总数`, `后置摄像头`, `前置摄像头`, `传感器类型`, `传感器型号`, `闪光灯`, `广角`, 
            `视频拍摄`, `网络类型`, `网络频段`, `SIM卡类型`, `WLAN功能`, `定位导航`, `蓝牙`, `NFC`, `红外功能`, `连接与共享`, 
            `机身接口`, `其他网络参数`, `电池类型`, `电池容量`, `有线充电`, `感应器`, `游戏功能`, `包装清单`
            ) 
            VALUES (
            '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', 
            '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', 
            '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', 
            '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}',
            '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', 
            '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', 
            '{}', '{}', '{}', '{}'
            )
            """.format(
                table_name, brand_name,
                phone_info_dict.get('手机名称'), phone_info_dict.get('发布会时间', '2001-3-3'), phone_info_dict.get('上市日期', '2001-3-3'),
                phone_info_dict.get('手机类型', ''), phone_info_dict.get('机身材质', ''), phone_info_dict.get('机身颜色', ''),
                phone_info_dict.get('指纹识别', ''), phone_info_dict.get('面部识别', ''), phone_info_dict.get('长度', ''),
                phone_info_dict.get('宽度', ''), phone_info_dict.get('厚度', ''), phone_info_dict.get('重量', ''),
                phone_info_dict.get('其他外观参数', ''), phone_info_dict.get('CPU型号', ''), phone_info_dict.get('CPU频率', ''),
                phone_info_dict.get('CPU核心数', ''), phone_info_dict.get('GPU型号', ''), phone_info_dict.get('RAM容量', ''),
                phone_info_dict.get('RAM存储类型', ''), phone_info_dict.get('ROM容量', ''), phone_info_dict.get('ROM存储类型', ''),
                phone_info_dict.get('存储卡', ''), phone_info_dict.get('出厂系统内核', ''), phone_info_dict.get('操作系统', ''),
                phone_info_dict.get('散热', ''), phone_info_dict.get('振动马达', ''), phone_info_dict.get('扬声器', ''),
                phone_info_dict.get('屏幕尺寸', ''), phone_info_dict.get('屏幕材质', ''), phone_info_dict.get('分辨率', ''),
                phone_info_dict.get('屏幕比例', ''), phone_info_dict.get('屏幕类型', ''), phone_info_dict.get('屏幕刷新率', ''),
                phone_info_dict.get('触控刷新率', ''), phone_info_dict.get('HDR技术', ''), phone_info_dict.get('对比度', ''),
                phone_info_dict.get('屏幕色彩', ''), phone_info_dict.get('屏幕技术', ''), phone_info_dict.get('摄像头总数', ''),
                phone_info_dict.get('后置摄像头', ''), phone_info_dict.get('前置摄像头', ''), phone_info_dict.get('传感器类型', ''),
                phone_info_dict.get('传感器型号', ''), phone_info_dict.get('闪光灯', ''), phone_info_dict.get('广角', ''),
                phone_info_dict.get('视频拍摄', ''), phone_info_dict.get('网络类型', ''), phone_info_dict.get('网络频段', ''),
                phone_info_dict.get('SIM卡类型', ''), phone_info_dict.get('WLAN功能', ''), phone_info_dict.get('定位导航', ''),
                phone_info_dict.get('蓝牙', ''), phone_info_dict.get('NFC', ''), phone_info_dict.get('红外功能', ''),
                phone_info_dict.get('连接与共享', ''), phone_info_dict.get('机身接口', ''), phone_info_dict.get('其他网络参数', ''),
                phone_info_dict.get('电池类型', ''), phone_info_dict.get('电池容量', ''), phone_info_dict.get('有线充电', ''),
                phone_info_dict.get('感应器', ''), phone_info_dict.get('游戏功能', ''), phone_info_dict.get('包装清单', ''),
            )
            try:
                cursor.execute(data_sql)
                db.commit()
            except Exception as e:
                print(table_name, e.args)
                db.rollback()
        # 关闭数据库连接
    db.close()


if __name__ == '__main__':
    brandIds = brands_id()
    # print(brandIds)
    # {'华为': '613', 'vivo': '1795', 'OPPO': '1673', '苹果': '544', '三星': '98', '荣耀': '50840', 'iQOO': '55075', '小米': '34645', '一加': '35579',
    # '魅族': '1434', 'realme': '55535', '努比亚': '35005', '红米': '55731', 'Moto': '295', '中兴': '642', '联想': '1763', '黑鲨': '53765'}
    save_data(brandIds)
